From fdc7fcd42028f22783ac59e166358c1deb82702d Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Tue, 22 Jun 2004 15:40:04 +0000 Subject: [PATCH] bitkeeper revision 1.994.1.6 (40d852d480GZ_R_XEId8Zw3_6I_0lA) Various cleanups. Get rid of zombie page state. --- tools/xc/lib/xc_domain.c | 12 +- tools/xc/py/Xc.c | 2 +- xen/common/dom_mem_ops.c | 20 +-- xen/common/domain.c | 168 ++++++++++-------------- xen/common/event_channel.c | 4 +- xen/common/keyhandler.c | 21 +-- xen/common/memory.c | 202 +++++++++++++++++------------ xen/drivers/char/console.c | 4 +- xen/include/asm-x86/spinlock.h | 227 +++++++++++---------------------- xen/include/xen/mm.h | 20 +-- xen/include/xen/sched.h | 8 +- xen/include/xen/spinlock.h | 154 +++++++--------------- 12 files changed, 348 insertions(+), 494 deletions(-) diff --git a/tools/xc/lib/xc_domain.c b/tools/xc/lib/xc_domain.c index ae4dff003d..4d173bfd89 100644 --- a/tools/xc/lib/xc_domain.c +++ b/tools/xc/lib/xc_domain.c @@ -92,12 +92,12 @@ int xc_domain_getinfo(int xc_handle, info->cpu = (op.u.getdomaininfo.flags>>DOMFLAGS_CPUSHIFT) & DOMFLAGS_CPUMASK; - info->dying = (op.u.getdomaininfo.flags & DOMFLAGS_DYING); - info->crashed = (op.u.getdomaininfo.flags & DOMFLAGS_CRASHED); - info->shutdown = (op.u.getdomaininfo.flags & DOMFLAGS_SHUTDOWN); - info->paused = (op.u.getdomaininfo.flags & DOMFLAGS_PAUSED); - info->blocked = (op.u.getdomaininfo.flags & DOMFLAGS_BLOCKED); - info->running = (op.u.getdomaininfo.flags & DOMFLAGS_RUNNING); + info->dying = !!(op.u.getdomaininfo.flags & DOMFLAGS_DYING); + info->crashed = !!(op.u.getdomaininfo.flags & DOMFLAGS_CRASHED); + info->shutdown = !!(op.u.getdomaininfo.flags & DOMFLAGS_SHUTDOWN); + info->paused = !!(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED); + info->blocked = !!(op.u.getdomaininfo.flags & DOMFLAGS_BLOCKED); + info->running = !!(op.u.getdomaininfo.flags & DOMFLAGS_RUNNING); info->shutdown_reason = (op.u.getdomaininfo.flags>>DOMFLAGS_SHUTDOWNSHIFT) & diff --git a/tools/xc/py/Xc.c b/tools/xc/py/Xc.c index 1c6a90c1b4..299202609e 100644 --- a/tools/xc/py/Xc.c +++ b/tools/xc/py/Xc.c @@ -1011,7 +1011,7 @@ static PyMethodDef pyxc_methods[] = { (PyCFunction)pyxc_domain_create, METH_VARARGS | METH_KEYWORDS, "\n" "Create a new domain.\n" - " mem_kb [int, 65536]: Memory allocation, in kilobytes.\n" + " mem_kb [int, 0]: Memory allocation, in kilobytes.\n" " name [str, '(anon)']: Informative textual name.\n\n" "Returns: [int] new domain identifier; -1 on error.\n" }, diff --git a/xen/common/dom_mem_ops.c b/xen/common/dom_mem_ops.c index 0927efe3a3..8506a5ba48 100644 --- a/xen/common/dom_mem_ops.c +++ b/xen/common/dom_mem_ops.c @@ -15,9 +15,9 @@ #include #include -static long alloc_dom_mem(struct domain *p, - unsigned long *pages, - unsigned long nr_pages) +static long alloc_dom_mem(struct domain *d, + unsigned long *pages, + unsigned long nr_pages) { struct pfn_info *page; unsigned long i; @@ -35,7 +35,7 @@ static long alloc_dom_mem(struct domain *p, for ( i = 0; i < nr_pages; i++ ) { /* NB. 'alloc_domain_page' does limit-checking on pages per domain. */ - if ( unlikely((page = alloc_domain_page(p)) == NULL) ) + if ( unlikely((page = alloc_domain_page(d)) == NULL) ) { DPRINTK("Could not allocate a frame\n"); break; @@ -49,9 +49,9 @@ static long alloc_dom_mem(struct domain *p, return i; } -static long free_dom_mem(struct domain *p, - unsigned long *pages, - unsigned long nr_pages) +static long free_dom_mem(struct domain *d, + unsigned long *pages, + unsigned long nr_pages) { struct pfn_info *page; unsigned long i, mpfn; @@ -65,15 +65,15 @@ static long free_dom_mem(struct domain *p, if ( unlikely(mpfn >= max_page) ) { DPRINTK("Domain %u page number out of range (%08lx>=%08lx)\n", - p->domain, mpfn, max_page); + d->domain, mpfn, max_page); rc = -EINVAL; break; } page = &frame_table[mpfn]; - if ( unlikely(!get_page(page, p)) ) + if ( unlikely(!get_page(page, d)) ) { - DPRINTK("Bad page free for domain %u\n", p->domain); + DPRINTK("Bad page free for domain %u\n", d->domain); rc = -EINVAL; break; } diff --git a/xen/common/domain.c b/xen/common/domain.c index 97de78159e..a50bc24bad 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -175,6 +175,7 @@ void domain_kill(struct domain *d) if ( !test_and_set_bit(DF_DYING, &d->flags) ) { sched_rem_domain(d); + domain_relinquish_memory(d); put_domain(d); } } @@ -215,7 +216,7 @@ void domain_shutdown(u8 reason) __enter_scheduler(); } -struct pfn_info *alloc_domain_page(struct domain *p) +struct pfn_info *alloc_domain_page(struct domain *d) { struct pfn_info *page = NULL; unsigned long flags, mask, pfn_stamp, cpu_stamp; @@ -255,23 +256,24 @@ struct pfn_info *alloc_domain_page(struct domain *p) } } - page->u.domain = p; + page->u.domain = d; page->type_and_flags = 0; - if ( p != NULL ) + if ( d != NULL ) { wmb(); /* Domain pointer must be visible before updating refcnt. */ - spin_lock(&p->page_list_lock); - if ( unlikely(p->tot_pages >= p->max_pages) ) + spin_lock(&d->page_list_lock); + if ( unlikely(d->tot_pages >= d->max_pages) ) { DPRINTK("Over-allocation for domain %u: %u >= %u\n", - p->domain, p->tot_pages, p->max_pages); - spin_unlock(&p->page_list_lock); + d->domain, d->tot_pages, d->max_pages); + spin_unlock(&d->page_list_lock); goto free_and_exit; } - list_add_tail(&page->list, &p->page_list); - p->tot_pages++; + list_add_tail(&page->list, &d->page_list); page->count_and_flags = PGC_allocated | 1; - spin_unlock(&p->page_list_lock); + if ( unlikely(d->tot_pages++ == 0) ) + get_domain(d); + spin_unlock(&d->page_list_lock); } return page; @@ -287,27 +289,28 @@ struct pfn_info *alloc_domain_page(struct domain *p) void free_domain_page(struct pfn_info *page) { unsigned long flags; - struct domain *p = page->u.domain; + struct domain *d = page->u.domain; ASSERT(!in_irq()); if ( likely(!IS_XEN_HEAP_FRAME(page)) ) { - /* - * No race with setting of zombie bit. If it wasn't set before the - * last reference was dropped, then it can't be set now. - */ page->u.cpu_mask = 0; - if ( !(page->count_and_flags & PGC_zombie) ) + page->tlbflush_timestamp = tlbflush_clock; + if ( likely(d != NULL) ) { - page->tlbflush_timestamp = tlbflush_clock; - if ( likely(p != NULL) ) + page->u.cpu_mask = 1 << d->processor; + /* NB. May recursively lock from domain_relinquish_memory(). */ + spin_lock_recursive(&d->page_list_lock); + list_del(&page->list); + if ( unlikely(--d->tot_pages == 0) ) { - page->u.cpu_mask = 1 << p->processor; - spin_lock(&p->page_list_lock); - list_del(&page->list); - p->tot_pages--; - spin_unlock(&p->page_list_lock); + spin_unlock_recursive(&d->page_list_lock); + put_domain(d); /* Domain 'd' can disappear now. */ + } + else + { + spin_unlock_recursive(&d->page_list_lock); } } @@ -332,13 +335,11 @@ void free_domain_page(struct pfn_info *page) } -void free_all_dom_mem(struct domain *p) +void domain_relinquish_memory(struct domain *d) { - struct list_head *ent, zombies; - struct pfn_info *page; - unsigned long x, y; - - INIT_LIST_HEAD(&zombies); + struct list_head *ent, *tmp; + struct pfn_info *page; + unsigned long x, y; /* * If we're executing the idle task then we may still be running over the @@ -348,51 +349,20 @@ void free_all_dom_mem(struct domain *p) write_ptbase(¤t->mm); /* Exit shadow mode before deconstructing final guest page table. */ - if ( p->mm.shadow_mode ) - shadow_mode_disable(p); - - /* STEP 1. Drop the in-use reference to the page-table base. */ - put_page_and_type(&frame_table[pagetable_val(p->mm.pagetable) >> - PAGE_SHIFT]); - - /* STEP 2. Zombify all pages on the domain's allocation list. */ - spin_lock(&p->page_list_lock); - while ( (ent = p->page_list.next) != &p->page_list ) - { - page = list_entry(ent, struct pfn_info, list); - - if ( unlikely(!get_page(page, p)) ) - { - /* - * Another CPU has dropped the last reference and is responsible - * for removing the page from this list. Wait for them to do so. - */ - spin_unlock(&p->page_list_lock); - while ( p->page_list.next == ent ) - barrier(); - spin_lock(&p->page_list_lock); - continue; - } + if ( d->mm.shadow_mode ) + shadow_mode_disable(d); - set_bit(_PGC_zombie, &page->count_and_flags); - - list_del(&page->list); - p->tot_pages--; + /* Drop the in-use reference to the page-table base. */ + if ( pagetable_val(d->mm.pagetable) != 0 ) + put_page_and_type(&frame_table[pagetable_val(d->mm.pagetable) >> + PAGE_SHIFT]); - list_add(&page->list, &zombies); - } - spin_unlock(&p->page_list_lock); - - /* - * STEP 3. With the domain's list lock now released, we examine each zombie - * page and drop references for guest-allocated and/or type-pinned pages. - */ - while ( (ent = zombies.next) != &zombies ) + /* Relinquish all pages on the domain's allocation list. */ + spin_lock_recursive(&d->page_list_lock); /* may enter free_domain_page() */ + list_for_each_safe ( ent, tmp, &d->page_list ) { page = list_entry(ent, struct pfn_info, list); - list_del(&page->list); - if ( test_and_clear_bit(_PGC_guest_pinned, &page->count_and_flags) ) put_page_and_type(page); @@ -416,28 +386,27 @@ void free_all_dom_mem(struct domain *p) free_page_type(page, PGT_base_page_table); } while ( unlikely(y != x) ); - - put_page(page); } + spin_unlock_recursive(&d->page_list_lock); } -unsigned int alloc_new_dom_mem(struct domain *p, unsigned int kbytes) +unsigned int alloc_new_dom_mem(struct domain *d, unsigned int kbytes) { unsigned int alloc_pfns, nr_pages; struct pfn_info *page; nr_pages = (kbytes + ((PAGE_SIZE-1)>>10)) >> (PAGE_SHIFT - 10); - p->max_pages = nr_pages; /* this can now be controlled independently */ + d->max_pages = nr_pages; /* this can now be controlled independently */ - /* grow the allocation if necessary */ - for ( alloc_pfns = p->tot_pages; alloc_pfns < nr_pages; alloc_pfns++ ) + /* Grow the allocation if necessary. */ + for ( alloc_pfns = d->tot_pages; alloc_pfns < nr_pages; alloc_pfns++ ) { - if ( unlikely((page=alloc_domain_page(p)) == NULL) || + if ( unlikely((page=alloc_domain_page(d)) == NULL) || unlikely(free_pfns < (SLACK_DOMAIN_MEM_KILOBYTES >> (PAGE_SHIFT-10))) ) { - free_all_dom_mem(p); + domain_relinquish_memory(d); return -ENOMEM; } @@ -447,55 +416,50 @@ unsigned int alloc_new_dom_mem(struct domain *p, unsigned int kbytes) #ifndef NDEBUG { /* Initialise with magic marker if in DEBUG mode. */ - void * a = map_domain_mem( (page-frame_table)<domain, PAGE_SIZE ); - unmap_domain_mem( a ); + void *a = map_domain_mem((page-frame_table)<domain, PAGE_SIZE); + unmap_domain_mem(a); } #endif - } - p->tot_pages = nr_pages; - return 0; } /* Release resources belonging to task @p. */ -void domain_destruct(struct domain *p) +void domain_destruct(struct domain *d) { - struct domain **pp; + struct domain **pd; unsigned long flags; - if ( !test_bit(DF_DYING, &p->flags) ) + if ( !test_bit(DF_DYING, &d->flags) ) BUG(); /* May be already destructed, or get_domain() can race us. */ - if ( cmpxchg(&p->refcnt.counter, 0, DOMAIN_DESTRUCTED) != 0 ) + if ( cmpxchg(&d->refcnt.counter, 0, DOMAIN_DESTRUCTED) != 0 ) return; - DPRINTK("Releasing task %u\n", p->domain); + DPRINTK("Releasing task %u\n", d->domain); /* Delete from task list and task hashtable. */ write_lock_irqsave(&tasklist_lock, flags); - pp = &task_list; - while ( *pp != p ) - pp = &(*pp)->next_list; - *pp = p->next_list; - pp = &task_hash[TASK_HASH(p->domain)]; - while ( *pp != p ) - pp = &(*pp)->next_hash; - *pp = p->next_hash; + pd = &task_list; + while ( *pd != d ) + pd = &(*pd)->next_list; + *pd = d->next_list; + pd = &task_hash[TASK_HASH(d->domain)]; + while ( *pd != d ) + pd = &(*pd)->next_hash; + *pd = d->next_hash; write_unlock_irqrestore(&tasklist_lock, flags); - destroy_event_channels(p); + destroy_event_channels(d); - /* Free all memory associated with this domain. */ - free_page((unsigned long)p->mm.perdomain_pt); - UNSHARE_PFN(virt_to_page(p->shared_info)); - free_all_dom_mem(p); + free_page((unsigned long)d->mm.perdomain_pt); + UNSHARE_PFN(virt_to_page(d->shared_info)); - free_domain_struct(p); + free_domain_struct(d); } diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c index 09c13ddb10..813519c923 100644 --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -91,7 +91,7 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) } /* Avoid deadlock by first acquiring lock of domain with smaller id. */ - if ( dom1 < dom2 ) + if ( d1 < d2 ) { spin_lock(&d1->event_channel_lock); spin_lock(&d2->event_channel_lock); @@ -271,7 +271,7 @@ static long __evtchn_close(struct domain *d1, int port1) goto out; } - if ( d1->domain < d2->domain ) + if ( d1 < d2 ) { spin_lock(&d2->event_channel_lock); } diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c index 1626603266..0b7b9b3a1e 100644 --- a/xen/common/keyhandler.c +++ b/xen/common/keyhandler.c @@ -71,27 +71,28 @@ static void halt_machine(u_char key, void *dev_id, struct pt_regs *regs) void do_task_queues(u_char key, void *dev_id, struct pt_regs *regs) { - unsigned long flags; - struct domain *p; - shared_info_t *s; - s_time_t now = NOW(); + unsigned long flags; + struct domain *d; + shared_info_t *s; + s_time_t now = NOW(); printk("'%c' pressed -> dumping task queues (now=0x%X:%08X)\n", key, (u32)(now>>32), (u32)now); read_lock_irqsave(&tasklist_lock, flags); - for_each_domain ( p ) + for_each_domain ( d ) { - printk("Xen: DOM %u, CPU %d [has=%c]\n", - p->domain, p->processor, - test_bit(DF_RUNNING, &p->flags) ? 'T':'F'); - s = p->shared_info; + printk("Xen: DOM %u, CPU %d [has=%c] refcnt=%d nr_pages=%d\n", + d->domain, d->processor, + test_bit(DF_RUNNING, &d->flags) ? 'T':'F', + atomic_read(&d->refcnt), d->tot_pages); + s = d->shared_info; printk("Guest: upcall_pend = %02x, upcall_mask = %02x\n", s->vcpu_data[0].evtchn_upcall_pending, s->vcpu_data[0].evtchn_upcall_mask); printk("Notifying guest...\n"); - send_guest_virq(p, VIRQ_DEBUG); + send_guest_virq(d, VIRQ_DEBUG); } read_unlock_irqrestore(&tasklist_lock, flags); diff --git a/xen/common/memory.c b/xen/common/memory.c index 088f82d2ff..9f7337f85d 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -151,10 +151,10 @@ static int alloc_l2_table(struct pfn_info *page); static int alloc_l1_table(struct pfn_info *page); -static int get_page_from_pagenr(unsigned long page_nr, struct domain *p); +static int get_page_from_pagenr(unsigned long page_nr, struct domain *d); static int get_page_and_type_from_pagenr(unsigned long page_nr, u32 type, - struct domain *p); + struct domain *d); static void free_l2_table(struct pfn_info *page); static void free_l1_table(struct pfn_info *page); @@ -241,35 +241,35 @@ void add_to_domain_alloc_list(unsigned long ps, unsigned long pe) spin_unlock_irqrestore(&free_list_lock, flags); } -static void __invalidate_shadow_ldt(struct domain *p) +static void __invalidate_shadow_ldt(struct domain *d) { int i; unsigned long pfn; struct pfn_info *page; - p->mm.shadow_ldt_mapcnt = 0; + d->mm.shadow_ldt_mapcnt = 0; for ( i = 16; i < 32; i++ ) { - pfn = l1_pgentry_to_pagenr(p->mm.perdomain_pt[i]); + pfn = l1_pgentry_to_pagenr(d->mm.perdomain_pt[i]); if ( pfn == 0 ) continue; - p->mm.perdomain_pt[i] = mk_l1_pgentry(0); - page = frame_table + pfn; + d->mm.perdomain_pt[i] = mk_l1_pgentry(0); + page = &frame_table[pfn]; ASSERT_PAGE_IS_TYPE(page, PGT_ldt_page); - ASSERT_PAGE_IS_DOMAIN(page, p); + ASSERT_PAGE_IS_DOMAIN(page, d); put_page_and_type(page); } /* Dispose of the (now possibly invalid) mappings from the TLB. */ - percpu_info[p->processor].deferred_ops |= DOP_FLUSH_TLB | DOP_RELOAD_LDT; + percpu_info[d->processor].deferred_ops |= DOP_FLUSH_TLB | DOP_RELOAD_LDT; } static inline void invalidate_shadow_ldt(void) { - struct domain *p = current; - if ( p->mm.shadow_ldt_mapcnt != 0 ) - __invalidate_shadow_ldt(p); + struct domain *d = current; + if ( d->mm.shadow_ldt_mapcnt != 0 ) + __invalidate_shadow_ldt(d); } @@ -294,28 +294,28 @@ int alloc_segdesc_page(struct pfn_info *page) /* Map shadow page at offset @off. */ int map_ldt_shadow_page(unsigned int off) { - struct domain *p = current; + struct domain *d = current; unsigned long l1e; if ( unlikely(in_irq()) ) BUG(); - __get_user(l1e, (unsigned long *)&linear_pg_table[(p->mm.ldt_base >> + __get_user(l1e, (unsigned long *)&linear_pg_table[(d->mm.ldt_base >> PAGE_SHIFT) + off]); if ( unlikely(!(l1e & _PAGE_PRESENT)) || unlikely(!get_page_and_type(&frame_table[l1e >> PAGE_SHIFT], - p, PGT_ldt_page)) ) + d, PGT_ldt_page)) ) return 0; - p->mm.perdomain_pt[off + 16] = mk_l1_pgentry(l1e | _PAGE_RW); - p->mm.shadow_ldt_mapcnt++; + d->mm.perdomain_pt[off + 16] = mk_l1_pgentry(l1e | _PAGE_RW); + d->mm.shadow_ldt_mapcnt++; return 1; } -static int get_page_from_pagenr(unsigned long page_nr, struct domain *p) +static int get_page_from_pagenr(unsigned long page_nr, struct domain *d) { struct pfn_info *page = &frame_table[page_nr]; @@ -325,7 +325,7 @@ static int get_page_from_pagenr(unsigned long page_nr, struct domain *p) return 0; } - if ( unlikely(!get_page(page, p)) ) + if ( unlikely(!get_page(page, d)) ) { MEM_LOG("Could not get page ref for pfn %08lx", page_nr); return 0; @@ -337,11 +337,11 @@ static int get_page_from_pagenr(unsigned long page_nr, struct domain *p) static int get_page_and_type_from_pagenr(unsigned long page_nr, u32 type, - struct domain *p) + struct domain *d) { struct pfn_info *page = &frame_table[page_nr]; - if ( unlikely(!get_page_from_pagenr(page_nr, p)) ) + if ( unlikely(!get_page_from_pagenr(page_nr, d)) ) return 0; if ( unlikely(!get_page_type(page, type)) ) @@ -412,7 +412,7 @@ static int get_page_from_l1e(l1_pgentry_t l1e) { unsigned long l1v = l1_pgentry_val(l1e); unsigned long pfn = l1_pgentry_to_pagenr(l1e); - extern int domain_iomem_in_pfn(struct domain *p, unsigned long pfn); + extern int domain_iomem_in_pfn(struct domain *d, unsigned long pfn); if ( !(l1v & _PAGE_PRESENT) ) return 1; @@ -720,21 +720,11 @@ int alloc_page_type(struct pfn_info *page, unsigned int type) &page->count_and_flags)) ) { struct domain *p = page->u.domain; - mb(); /* Check zombie status before using domain ptr. */ - /* - * NB. 'p' may no longer be valid by time we dereference it, so - * p->processor might be garbage. We clamp it, just in case. - */ - if ( likely(!test_bit(_PGC_zombie, &page->count_and_flags)) ) + if ( unlikely(NEED_FLUSH(tlbflush_time[p->processor], + page->tlbflush_timestamp)) ) { - unsigned int cpu = p->processor; - if ( likely(cpu <= smp_num_cpus) && - unlikely(NEED_FLUSH(tlbflush_time[cpu], - page->tlbflush_timestamp)) ) - { - perfc_incr(need_flush_tlb_flush); - flush_tlb_cpu(cpu); - } + perfc_incr(need_flush_tlb_flush); + flush_tlb_cpu(p->processor); } } @@ -803,7 +793,8 @@ static int do_extended_command(unsigned long ptr, unsigned long val) unsigned long pfn = ptr >> PAGE_SHIFT; unsigned long old_base_pfn; struct pfn_info *page = &frame_table[pfn]; - struct domain *p = current, *q; + struct domain *d = current, *nd, *e; + u32 x, y; domid_t domid; switch ( cmd ) @@ -853,18 +844,18 @@ static int do_extended_command(unsigned long ptr, unsigned long val) break; case MMUEXT_NEW_BASEPTR: - okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table, p); + okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table, d); if ( likely(okay) ) { invalidate_shadow_ldt(); percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB; - old_base_pfn = pagetable_val(p->mm.pagetable) >> PAGE_SHIFT; - p->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT); + old_base_pfn = pagetable_val(d->mm.pagetable) >> PAGE_SHIFT; + d->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT); - shadow_mk_pagetable(&p->mm); + shadow_mk_pagetable(&d->mm); - write_ptbase(&p->mm); + write_ptbase(&d->mm); put_page_and_type(&frame_table[old_base_pfn]); @@ -900,13 +891,13 @@ static int do_extended_command(unsigned long ptr, unsigned long val) okay = 0; MEM_LOG("Bad args to SET_LDT: ptr=%08lx, ents=%08lx", ptr, ents); } - else if ( (p->mm.ldt_ents != ents) || - (p->mm.ldt_base != ptr) ) + else if ( (d->mm.ldt_ents != ents) || + (d->mm.ldt_base != ptr) ) { invalidate_shadow_ldt(); - p->mm.ldt_base = ptr; - p->mm.ldt_ents = ents; - load_LDT(p); + d->mm.ldt_base = ptr; + d->mm.ldt_ents = ents; + load_LDT(d); percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT; if ( ents != 0 ) percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT; @@ -917,10 +908,10 @@ static int do_extended_command(unsigned long ptr, unsigned long val) case MMUEXT_SET_SUBJECTDOM: domid = ((domid_t)((ptr&~0xFFFF)|(val>>16))); - if ( !IS_PRIV(p) ) + if ( !IS_PRIV(d) ) { MEM_LOG("Dom %u has no privilege to set subject domain", - p->domain); + d->domain); okay = 0; } else @@ -939,31 +930,89 @@ static int do_extended_command(unsigned long ptr, unsigned long val) break; case MMUEXT_REASSIGN_PAGE: - if ( unlikely(!IS_PRIV(p)) ) + if ( unlikely(!IS_PRIV(d)) ) + { + MEM_LOG("Dom %u has no reassignment priv", d->domain); + okay = 0; + break; + } + + if ( unlikely((e = percpu_info[cpu].gps) == NULL) ) { - MEM_LOG("Dom %u has no privilege to reassign page ownership", - p->domain); + MEM_LOG("No GPS to reassign pfn %08lx to\n", pfn); okay = 0; + break; } - else if ( likely((q = percpu_info[cpu].gps) != NULL) && - likely(test_bit(_PGC_allocated, &page->count_and_flags)) && - likely(page->u.domain == p) ) /* won't be smp-guest safe */ + + /* + * Grab both page_list locks, in order. This prevents the page from + * disappearing elsewhere while we modify the owner, and we'll need + * both locks if we're successful so that we can change lists. + */ + if ( d < e ) { - spin_lock(&p->page_list_lock); - p->tot_pages--; - list_del(&page->list); - spin_unlock(&p->page_list_lock); - page->u.domain = q; - spin_lock(&q->page_list_lock); - q->tot_pages++; - list_add_tail(&page->list, &q->page_list); - spin_unlock(&q->page_list_lock); + spin_lock(&d->page_list_lock); + spin_lock(&e->page_list_lock); } else { - MEM_LOG("No GPS to reassign pfn %08lx to\n", pfn); + spin_lock(&e->page_list_lock); + spin_lock(&d->page_list_lock); + } + + /* A domain shouldn't have PGC_allocated pages when it is dying. */ + if ( unlikely(test_bit(DF_DYING, &e->flags)) ) + { okay = 0; + goto reassign_fail; } + + /* + * The tricky bit: atomically change owner while there is just one + * benign reference to the page (PGC_allocated). If that reference + * disappears then the deallocation routine will safely spin. + */ + nd = page->u.domain; + y = page->count_and_flags; + do { + x = y; + if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != + (1|PGC_allocated)) || + unlikely(nd != d) ) + { + MEM_LOG("Bad page values %08lx: ed=%p(%u), sd=%p," + " caf=%08x, taf=%08x\n", page_to_pfn(page), + d, d->domain, nd, x, page->type_and_flags); + okay = 0; + goto reassign_fail; + } + __asm__ __volatile__( + LOCK_PREFIX "cmpxchg8b %3" + : "=a" (nd), "=d" (y), "=b" (e), + "=m" (*(volatile u64 *)(&page->u.domain)) + : "0" (d), "1" (x), "b" (e), "c" (x) ); + } + while ( unlikely(nd != d) || unlikely(y != x) ); + + /* + * Unlink from 'd'. We transferred at least one reference to 'e', so + * noone else is spinning to try to delete this page from 'd'. + */ + d->tot_pages--; + list_del(&page->list); + + /* + * Add the page to 'e'. Someone may already have removed the last + * reference and want to remove the page from 'e'. However, we have + * the lock so they'll spin waiting for us. + */ + if ( unlikely(e->tot_pages++ == 0) ) + get_domain(e); + list_add_tail(&page->list, &e->page_list); + + reassign_fail: + spin_unlock(&d->page_list_lock); + spin_unlock(&e->page_list_lock); break; case MMUEXT_RESET_SUBJECTDOM: @@ -1228,14 +1277,14 @@ int do_update_va_mapping_otherdomain(unsigned long page_nr, domid_t domid) { unsigned int cpu = smp_processor_id(); - struct domain *p; + struct domain *d; int rc; if ( unlikely(!IS_PRIV(current)) ) return -EPERM; - percpu_info[cpu].gps = p = find_domain_by_id(domid); - if ( unlikely(p == NULL) ) + percpu_info[cpu].gps = d = find_domain_by_id(domid); + if ( unlikely(d == NULL) ) { MEM_LOG("Unknown domain '%u'", domid); return -ESRCH; @@ -1243,7 +1292,7 @@ int do_update_va_mapping_otherdomain(unsigned long page_nr, rc = do_update_va_mapping(page_nr, val, flags); - put_domain(p); + put_domain(d); percpu_info[cpu].gps = NULL; return rc; @@ -1257,8 +1306,6 @@ int do_update_va_mapping_otherdomain(unsigned long page_nr, * audit_page(): in addition maintains a history of audited pages * reaudit_pages(): re-audit previously audited pages * audit_all_pages(): check the ref-count for all leaf pages - * also checks for zombie pages - * * reaudit_page() and audit_all_pages() are designed to be * keyhandler functions so that they can be easily invoked from the console. */ @@ -1285,8 +1332,6 @@ void __audit_page(unsigned long pfn) { { if ( (frame_table[i].count_and_flags & PGC_count_mask) == 0 ) continue; - if ( (frame_table[i].count_and_flags & PGC_zombie) != 0 ) - continue; /* check if entry is a page table (L1 page table) and in use */ if ( ((frame_table[i].type_and_flags & PGT_type_mask) == @@ -1359,7 +1404,6 @@ void reaudit_pages(u_char key, void *dev_id, struct pt_regs *regs) /* * do various checks on all pages. * Currently: - * - check for zombie pages * - check for pages with corrupt ref-count * Interrupts are diabled completely. use with care. */ @@ -1376,16 +1420,6 @@ void audit_all_pages(u_char key, void *dev_id, struct pt_regs *regs) /* walk the frame table */ for ( i = 0; i < max_page; i++ ) { - /* check for zombies */ - if ( ((frame_table[i].count_and_flags & PGC_count_mask) != 0) && - ((frame_table[i].count_and_flags & PGC_zombie) != 0) ) - { - printk("zombie: pfn=%08lx cf=%08x tf=%08x dom=%08lx\n", - i, frame_table[i].count_and_flags, - frame_table[i].type_and_flags, - (unsigned long)frame_table[i].u.domain); - } - /* check ref count for leaf pages */ if ( ((frame_table[i].type_and_flags & PGT_type_mask) == PGT_writeable_page) ) diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c index 474aa3b049..f675d03049 100644 --- a/xen/drivers/char/console.c +++ b/xen/drivers/char/console.c @@ -290,7 +290,7 @@ long do_console_io(int cmd, int count, char *buffer) long rc; #ifdef NDEBUG - /* Only domain-0 may access the emrgency console. */ + /* Only domain-0 may access the emergency console. */ if ( current->domain != 0 ) return -EPERM; #endif @@ -445,7 +445,7 @@ long do_console_write(char *str, unsigned int count) return 0; #else - if ( !test_and_set_bit(DF_CONSOLEWRITEBUG, ¤t->flags) ) + if ( !test_and_set_bit(DF_CONWRITEBUG, ¤t->flags) ) { printk("DOM%u is attempting to use the deprecated " "HYPERVISOR_console_write() interface.\n", current->domain); diff --git a/xen/include/asm-x86/spinlock.h b/xen/include/asm-x86/spinlock.h index 5cbb5a413e..dd9869d331 100644 --- a/xen/include/asm-x86/spinlock.h +++ b/xen/include/asm-x86/spinlock.h @@ -6,200 +6,123 @@ #include #include -#if 0 -#define SPINLOCK_DEBUG 1 -#else -#define SPINLOCK_DEBUG 0 -#endif - -/* - * Your basic SMP spinlocks, allowing only a single CPU anywhere - */ - typedef struct { - volatile unsigned int lock; -#if SPINLOCK_DEBUG - unsigned magic; -#endif + volatile s16 lock; + s8 recurse_cpu; + u8 recurse_cnt; } spinlock_t; -#define SPINLOCK_MAGIC 0xdead4ead - -#if SPINLOCK_DEBUG -#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC -#else -#define SPINLOCK_MAGIC_INIT /* */ -#endif - -#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT } +#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1, -1, 0 } #define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0) - -/* - * Simple spin lock operations. There are two variants, one clears IRQ's - * on the local processor, one does not. - * - * We make no fairness assumptions. They have a cost. - */ - #define spin_is_locked(x) (*(volatile char *)(&(x)->lock) <= 0) -#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x)) - -#define spin_lock_string \ - "\n1:\t" \ - "lock ; decb %0\n\t" \ - "js 2f\n" \ - ".section .text.lock,\"ax\"\n" \ - "2:\t" \ - "cmpb $0,%0\n\t" \ - "rep;nop\n\t" \ - "jle 2b\n\t" \ - "jmp 1b\n" \ - ".previous" - -/* - * This works. Despite all the confusion. - * (except on PPro SMP or if we are using OOSTORE) - * (PPro errata 66, 92) - */ - -#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE) -#define spin_unlock_string \ - "movb $1,%0" \ - :"=m" (lock->lock) : : "memory" - - -static inline void spin_unlock(spinlock_t *lock) +static inline void spin_lock(spinlock_t *lock) { -#if SPINLOCK_DEBUG - if (lock->magic != SPINLOCK_MAGIC) - BUG(); - if (!spin_is_locked(lock)) - BUG(); -#endif - __asm__ __volatile__( - spin_unlock_string - ); + __asm__ __volatile__ ( + "1: lock; decb %0 \n" + " js 2f \n" + ".section .text.lock,\"ax\"\n" + "2: cmpb $0,%0 \n" + " rep; nop \n" + " jle 2b \n" + " jmp 1b \n" + ".previous" + : "=m" (lock->lock) : : "memory" ); } -#else - -#define spin_unlock_string \ - "xchgb %b0, %1" \ - :"=q" (oldval), "=m" (lock->lock) \ - :"0" (oldval) : "memory" - static inline void spin_unlock(spinlock_t *lock) { - char oldval = 1; -#if SPINLOCK_DEBUG - if (lock->magic != SPINLOCK_MAGIC) - BUG(); - if (!spin_is_locked(lock)) - BUG(); +#if !defined(CONFIG_X86_OOSTORE) + ASSERT(spin_is_locked(lock)); + __asm__ __volatile__ ( + "movb $1,%0" + : "=m" (lock->lock) : : "memory" ); +#else + char oldval = 1; + ASSERT(spin_is_locked(lock)); + __asm__ __volatile__ ( + "xchgb %b0, %1" + : "=q" (oldval), "=m" (lock->lock) : "0" (oldval) : "memory" ); #endif - __asm__ __volatile__( - spin_unlock_string - ); } -#endif - static inline int spin_trylock(spinlock_t *lock) { - char oldval; - __asm__ __volatile__( - "xchgb %b0,%1" - :"=q" (oldval), "=m" (lock->lock) - :"0" (0) : "memory"); - return oldval > 0; -} - -static inline void spin_lock(spinlock_t *lock) -{ -#if SPINLOCK_DEBUG - __label__ here; -here: - if (lock->magic != SPINLOCK_MAGIC) { -printk("eip: %p\n", &&here); - BUG(); - } -#endif - __asm__ __volatile__( - spin_lock_string - :"=m" (lock->lock) : : "memory"); + char oldval; + __asm__ __volatile__( + "xchgb %b0,%1" + :"=q" (oldval), "=m" (lock->lock) + :"0" (0) : "memory"); + return oldval > 0; } - /* - * Read-write spinlocks, allowing multiple readers - * but only one writer. - * - * NOTE! it is quite common to have readers in interrupts - * but no interrupt writers. For those circumstances we - * can "mix" irq-safe locks - any writer needs to get a - * irq-safe write-lock, but readers can get non-irqsafe - * read-locks. + * spin_[un]lock_recursive(): Use these forms when the lock can (safely!) be + * reentered recursively on the same CPU. All critical regions that may form + * part of a recursively-nested set must be protected by these forms. If there + * are any critical regions that cannot form part of such a set, they can use + * standard spin_[un]lock(). */ -typedef struct { - volatile unsigned int lock; -#if SPINLOCK_DEBUG - unsigned magic; -#endif -} rwlock_t; +#define spin_lock_recursive(_lock) \ + do { \ + int cpu = smp_processor_id(); \ + if ( likely((_lock)->recurse_cpu != cpu) ) \ + { \ + spin_lock(_lock); \ + (_lock)->recurse_cpu = cpu; \ + } \ + (_lock)->recurse_cnt++; \ + } while ( 0 ) + +#define spin_unlock_recursive(_lock) \ + do { \ + if ( likely(--(_lock)->recurse_cnt == 0) ) \ + { \ + (_lock)->recurse_cpu = -1; \ + spin_unlock(_lock); \ + } \ + } while ( 0 ) -#define RWLOCK_MAGIC 0xdeaf1eed -#if SPINLOCK_DEBUG -#define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC -#else -#define RWLOCK_MAGIC_INIT /* */ -#endif +typedef struct { + volatile unsigned int lock; +} rwlock_t; -#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT } +#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS } #define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) /* * On x86, we implement read-write locks as a 32-bit counter * with the high bit (sign) being the "contended" bit. - * - * The inline assembly is non-obvious. Think about it. - * - * Changed to use the same technique as rw semaphores. See - * semaphore.h for details. -ben */ -/* the spinlock helpers are in arch/x86/kernel/semaphore.c */ - static inline void read_lock(rwlock_t *rw) { -#if SPINLOCK_DEBUG - if (rw->magic != RWLOCK_MAGIC) - BUG(); -#endif - __build_read_lock(rw, "__read_lock_failed"); + __build_read_lock(rw, "__read_lock_failed"); } static inline void write_lock(rwlock_t *rw) { -#if SPINLOCK_DEBUG - if (rw->magic != RWLOCK_MAGIC) - BUG(); -#endif - __build_write_lock(rw, "__write_lock_failed"); + __build_write_lock(rw, "__write_lock_failed"); } -#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") -#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") +#define read_unlock(rw) \ + __asm__ __volatile__ ( \ + "lock ; incl %0" : \ + "=m" ((rw)->lock) : : "memory" ) +#define write_unlock(rw) \ + __asm__ __volatile__ ( \ + "lock ; addl $" RW_LOCK_BIAS_STR ",%0" : \ + "=m" ((rw)->lock) : : "memory" ) static inline int write_trylock(rwlock_t *lock) { - atomic_t *count = (atomic_t *)lock; - if (atomic_sub_and_test(RW_LOCK_BIAS, count)) - return 1; - atomic_add(RW_LOCK_BIAS, count); - return 0; + atomic_t *count = (atomic_t *)lock; + if ( atomic_sub_and_test(RW_LOCK_BIAS, count) ) + return 1; + atomic_add(RW_LOCK_BIAS, count); + return 0; } #endif /* __ASM_SPINLOCK_H */ diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h index 17ae182e66..f23ad5f3f4 100644 --- a/xen/include/xen/mm.h +++ b/xen/include/xen/mm.h @@ -54,12 +54,10 @@ struct pfn_info struct list_head list; /* The following possible uses are context-dependent. */ union { - /* Page is in use and not a zombie: we keep a pointer to its owner. */ + /* Page is in use: we keep a pointer to its owner. */ struct domain *domain; /* Page is not currently allocated: mask of possibly-tainted TLBs. */ unsigned long cpu_mask; - /* Page is a zombie: this word currently has no use. */ - unsigned long _unused; } u; /* Reference count and various PGC_xxx flags and fields. */ u32 count_and_flags; @@ -85,20 +83,17 @@ struct pfn_info /* 28-bit count of uses of this frame as its current type. */ #define PGT_count_mask ((1<<28)-1) - /* The owner of this page is dead: 'u.domain' is no longer valid. */ -#define _PGC_zombie 31 -#define PGC_zombie (1<<_PGC_zombie) /* For safety, force a TLB flush when this page's type changes. */ -#define _PGC_tlb_flush_on_type_change 30 +#define _PGC_tlb_flush_on_type_change 31 #define PGC_tlb_flush_on_type_change (1<<_PGC_tlb_flush_on_type_change) /* Owning guest has pinned this page to its current type? */ -#define _PGC_guest_pinned 29 +#define _PGC_guest_pinned 30 #define PGC_guest_pinned (1<<_PGC_guest_pinned) /* Cleared when the owning guest 'frees' this page. */ -#define _PGC_allocated 28 +#define _PGC_allocated 29 #define PGC_allocated (1<<_PGC_allocated) /* 28-bit count of references to this frame. */ -#define PGC_count_mask ((1<<28)-1) +#define PGC_count_mask ((1<<29)-1) /* We trust the slab allocator in slab.c, and our use of it. */ @@ -160,12 +155,11 @@ static inline int get_page(struct pfn_info *page, p = np; if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */ unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */ - unlikely(x & PGC_zombie) || /* Zombie? */ unlikely(p != domain) ) /* Wrong owner? */ { DPRINTK("Error pfn %08lx: ed=%p(%u), sd=%p(%u)," " caf=%08x, taf=%08x\n", - page_to_pfn(page), domain, (domain)?domain->domain:999, + page_to_pfn(page), domain, domain->domain, p, (p && !((x & PGC_count_mask) == 0))?p->domain:999, x, page->type_and_flags); return 0; @@ -173,7 +167,7 @@ static inline int get_page(struct pfn_info *page, __asm__ __volatile__( LOCK_PREFIX "cmpxchg8b %3" : "=a" (np), "=d" (y), "=b" (p), - "=m" (*(volatile unsigned long long *)(&page->u.domain)) + "=m" (*(volatile u64 *)(&page->u.domain)) : "0" (p), "1" (x), "b" (p), "c" (nx) ); } while ( unlikely(np != p) || unlikely(y != x) ); diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 467a6878f5..0bb64967d4 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -194,8 +194,8 @@ extern void domain_destruct(struct domain *d); extern void domain_kill(struct domain *d); extern void domain_crash(void); extern void domain_shutdown(u8 reason); +extern void domain_relinquish_memory(struct domain *d); -/* arch/process.c */ void new_thread(struct domain *d, unsigned long start_pc, unsigned long start_stack, @@ -206,9 +206,6 @@ extern unsigned long wait_init_idle; extern spinlock_t schedule_lock[NR_CPUS] __cacheline_aligned; -/* - * Scheduler functions (in schedule.c) - */ #define set_current_state(_s) do { current->state = (_s); } while (0) void scheduler_init(void); void schedulers_start(void); @@ -226,7 +223,6 @@ void __enter_scheduler(void); extern void switch_to(struct domain *prev, struct domain *next); - void domain_init(void); int idle_cpu(int cpu); /* Is CPU 'cpu' idle right now? */ @@ -251,7 +247,7 @@ extern struct domain *task_list; #define DF_CONSTRUCTED 3 /* Has the guest OS been fully built yet? */ #define DF_IDLETASK 4 /* Is this one of the per-CPU idle domains? */ #define DF_PRIVILEGED 5 /* Is this domain privileged? */ -#define DF_CONSOLEWRITEBUG 6 /* Has this domain used the obsolete console? */ +#define DF_CONWRITEBUG 6 /* Has this domain used the obsolete console? */ #define DF_PHYSDEV 7 /* May this domain do IO to physical devices? */ #define DF_BLOCKED 8 /* Domain is blocked waiting for an event. */ #define DF_CTRLPAUSE 9 /* Domain is paused by controller software. */ diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h index 08dc3c1d97..206f3c1762 100644 --- a/xen/include/xen/spinlock.h +++ b/xen/include/xen/spinlock.h @@ -1,132 +1,74 @@ -#ifndef __LINUX_SPINLOCK_H -#define __LINUX_SPINLOCK_H +#ifndef __SPINLOCK_H__ +#define __SPINLOCK_H__ #include #include -/* - * These are the generic versions of the spinlocks and read-write - * locks.. - */ -#define spin_lock_irqsave(lock, flags) do { local_irq_save(flags); spin_lock(lock); } while (0) -#define spin_lock_irq(lock) do { local_irq_disable(); spin_lock(lock); } while (0) +#define spin_lock_irqsave(lock, flags) \ + do { local_irq_save(flags); spin_lock(lock); } while ( 0 ) +#define spin_lock_irq(lock) \ + do { local_irq_disable(); spin_lock(lock); } while ( 0 ) -#define read_lock_irqsave(lock, flags) do { local_irq_save(flags); read_lock(lock); } while (0) -#define read_lock_irq(lock) do { local_irq_disable(); read_lock(lock); } while (0) +#define read_lock_irqsave(lock, flags) \ + do { local_irq_save(flags); read_lock(lock); } while ( 0 ) +#define read_lock_irq(lock) \ + do { local_irq_disable(); read_lock(lock); } while ( 0 ) -#define write_lock_irqsave(lock, flags) do { local_irq_save(flags); write_lock(lock); } while (0) -#define write_lock_irq(lock) do { local_irq_disable(); write_lock(lock); } while (0) +#define write_lock_irqsave(lock, flags) \ + do { local_irq_save(flags); write_lock(lock); } while ( 0 ) +#define write_lock_irq(lock) \ + do { local_irq_disable(); write_lock(lock); } while ( 0 ) -#define spin_unlock_irqrestore(lock, flags) do { spin_unlock(lock); local_irq_restore(flags); } while (0) -#define spin_unlock_irq(lock) do { spin_unlock(lock); local_irq_enable(); } while (0) +#define spin_unlock_irqrestore(lock, flags) \ + do { spin_unlock(lock); local_irq_restore(flags); } while ( 0 ) +#define spin_unlock_irq(lock) \ + do { spin_unlock(lock); local_irq_enable(); } while ( 0 ) -#define read_unlock_irqrestore(lock, flags) do { read_unlock(lock); local_irq_restore(flags); } while (0) -#define read_unlock_irq(lock) do { read_unlock(lock); local_irq_enable(); } while (0) +#define read_unlock_irqrestore(lock, flags) \ + do { read_unlock(lock); local_irq_restore(flags); } while ( 0 ) +#define read_unlock_irq(lock) \ + do { read_unlock(lock); local_irq_enable(); } while ( 0 ) -#define write_unlock_irqrestore(lock, flags) do { write_unlock(lock); local_irq_restore(flags); } while (0) -#define write_unlock_irq(lock) do { write_unlock(lock); local_irq_enable(); } while (0) +#define write_unlock_irqrestore(lock, flags) \ + do { write_unlock(lock); local_irq_restore(flags); } while ( 0 ) +#define write_unlock_irq(lock) \ + do { write_unlock(lock); local_irq_enable(); } while ( 0 ) #ifdef CONFIG_SMP -#include - -#elif !defined(spin_lock_init) /* !SMP and spin_lock_init not previously - defined (e.g. by including asm/spinlock.h */ - -#define DEBUG_SPINLOCKS 0 /* 0 == no debugging, 1 == maintain lock state, 2 == full debug */ -#if (DEBUG_SPINLOCKS < 1) +#include -#define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic) -#define ATOMIC_DEC_AND_LOCK +#else -/* - * Your basic spinlocks, allowing only a single CPU anywhere - * - * Most gcc versions have a nasty bug with empty initializers. - */ #if (__GNUC__ > 2) - typedef struct { } spinlock_t; - #define SPIN_LOCK_UNLOCKED (spinlock_t) { } +typedef struct { } spinlock_t; +#define SPIN_LOCK_UNLOCKED (spinlock_t) { } #else - typedef struct { int gcc_is_buggy; } spinlock_t; - #define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 } +typedef struct { int gcc_is_buggy; } spinlock_t; +#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 } #endif -#define spin_lock_init(lock) do { } while(0) -#define spin_lock(lock) (void)(lock) /* Not "unused variable". */ -#define spin_is_locked(lock) (0) -#define spin_trylock(lock) ({1; }) -#define spin_unlock_wait(lock) do { } while(0) -#define spin_unlock(lock) do { } while(0) - -#elif (DEBUG_SPINLOCKS < 2) +#define spin_lock_init(lock) do { } while(0) +#define spin_lock(lock) (void)(lock) /* Not "unused variable". */ +#define spin_is_locked(lock) (0) +#define spin_trylock(lock) ({1; }) +#define spin_unlock_wait(lock) do { } while(0) +#define spin_unlock(lock) do { } while(0) -typedef struct { - volatile unsigned long lock; -} spinlock_t; -#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 } - -#define spin_lock_init(x) do { (x)->lock = 0; } while (0) -#define spin_is_locked(lock) (test_bit(0,(lock))) -#define spin_trylock(lock) (!test_and_set_bit(0,(lock))) - -#define spin_lock(x) do { (x)->lock = 1; } while (0) -#define spin_unlock_wait(x) do { } while (0) -#define spin_unlock(x) do { (x)->lock = 0; } while (0) - -#else /* (DEBUG_SPINLOCKS >= 2) */ - -typedef struct { - volatile unsigned long lock; - volatile unsigned int babble; - const char *module; -} spinlock_t; -#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0, 25, __BASE_FILE__ } - -/*#include */ - -#define spin_lock_init(x) do { (x)->lock = 0; } while (0) -#define spin_is_locked(lock) (test_bit(0,(lock))) -#define spin_trylock(lock) (!test_and_set_bit(0,(lock))) - -#define spin_lock(x) do {unsigned long __spinflags; save_flags(__spinflags); cli(); if ((x)->lock&&(x)->babble) {printk("%s:%d: spin_lock(%s:%p) already locked\n", __BASE_FILE__,__LINE__, (x)->module, (x));(x)->babble--;} (x)->lock = 1; restore_flags(__spinflags);} while (0) -#define spin_unlock_wait(x) do {unsigned long __spinflags; save_flags(__spinflags); cli(); if ((x)->lock&&(x)->babble) {printk("%s:%d: spin_unlock_wait(%s:%p) deadlock\n", __BASE_FILE__,__LINE__, (x)->module, (x));(x)->babble--;} restore_flags(__spinflags);} while (0) -#define spin_unlock(x) do {unsigned long __spinflags; save_flags(__spinflags); cli(); if (!(x)->lock&&(x)->babble) {printk("%s:%d: spin_unlock(%s:%p) not locked\n", __BASE_FILE__,__LINE__, (x)->module, (x));(x)->babble--;} (x)->lock = 0; restore_flags(__spinflags);} while (0) - -#endif /* DEBUG_SPINLOCKS */ - -/* - * Read-write spinlocks, allowing multiple readers - * but only one writer. - * - * NOTE! it is quite common to have readers in interrupts - * but no interrupt writers. For those circumstances we - * can "mix" irq-safe locks - any writer needs to get a - * irq-safe write-lock, but readers can get non-irqsafe - * read-locks. - * - * Most gcc versions have a nasty bug with empty initializers. - */ #if (__GNUC__ > 2) - typedef struct { } rwlock_t; - #define RW_LOCK_UNLOCKED (rwlock_t) { } +typedef struct { } rwlock_t; +#define RW_LOCK_UNLOCKED (rwlock_t) { } #else - typedef struct { int gcc_is_buggy; } rwlock_t; - #define RW_LOCK_UNLOCKED (rwlock_t) { 0 } +typedef struct { int gcc_is_buggy; } rwlock_t; +#define RW_LOCK_UNLOCKED (rwlock_t) { 0 } #endif -#define rwlock_init(lock) do { } while(0) -#define read_lock(lock) (void)(lock) /* Not "unused variable". */ -#define read_unlock(lock) do { } while(0) -#define write_lock(lock) (void)(lock) /* Not "unused variable". */ -#define write_unlock(lock) do { } while(0) - -#endif /* !SMP */ +#define rwlock_init(lock) do { } while(0) +#define read_lock(lock) (void)(lock) /* Not "unused variable". */ +#define read_unlock(lock) do { } while(0) +#define write_lock(lock) (void)(lock) /* Not "unused variable". */ +#define write_unlock(lock) do { } while(0) -/* "lock on reference count zero" */ -#ifndef ATOMIC_DEC_AND_LOCK -#include -extern int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); #endif -#endif /* __LINUX_SPINLOCK_H */ +#endif /* __SPINLOCK_H__ */ -- 2.30.2